## MPs for Sale Replication replication
##
## Conservative MPs
## Estimand: ATE
## Dependent variable: Income 
##
## Reproduce: 
##      - Table 2 (bottom panel) 
##      - Figure A2 
##        (a) TASMD: conservative winners (treated)
##        (b) TASMD: conservative winners (control)
##        (c) KS:    conservative winners (treated)
##        (d) KS:    conservative winners (control)
##
## Kevin Quinn and Guoer Liu
## 11/19/2024

# setwd("") # set at your current working directory
set.seed(91800198)

source("func.R")
library(haven)
library(WeightIt)
library(Matching)
library(cem)
library(data.table)
library(RColorBrewer)
library(lattice)



mydata <- read_dta("mpsforsale.dta")
mydata <- as.data.frame(mydata)

## subset to just Conservative MPs
mydata <- mydata[mydata$tory==1, ]

## remove covariates due to multicolinearity
cov.names.all <- names(mydata)[grep("xx", names(mydata))]
cov.form <- as.formula(paste("lnrealgross", '~', 
                             paste(cov.names.all, collapse = "+")))
mod <- lm(cov.form, mydata)
cov.names.sub <- names(mod$coefficients)[!is.na(mod$coefficients)]
cov.names.sub <- cov.names.sub[-1] # remove intercept

mydata.all <- mydata[, c("sname", "fname", "labour", "tory", cov.names.all,
                     "treated", "lnrealgross")]
mydata.all <- na.omit(mydata.all)

mydata.sub <- mydata[, c("sname", "fname", "labour", "tory", cov.names.sub,
                     "treated", "lnrealgross")]
mydata.sub <- na.omit(mydata.sub)

## NOTE that there are 222 observations after listwise deletion
##      Catherine Morton (row 386 of the original data) is missing year of birth


## formula
treat.form.cov.sub <- as.formula(paste('treated', "~",
			paste(cov.names.sub, collapse = "+"))) # model treatment
treat.form.cov.all <- as.formula(paste('treated', "~",
			paste(cov.names.all, collapse = "+"))) # model treatment
out.form.cov.sub <- as.formula(paste("lnrealgross", '~', "treated", "+",
			paste(cov.names.sub, collapse = "+"))) # model outcome




estimator <- NULL
estimate <- NULL
SE <- NULL
Eff.n <- NULL
Eff.n.ratio <- NULL
DFBETA.min <- NULL
DFBETA.max <- NULL
DFBETA.min.who <- NULL
DFBETA.max.who <- NULL
extrap.treat <- NULL
extrap.control <- NULL



## #############################################################
## general regression (two regression models)
estimator <- c(estimator, "reg.general")
mydata.1 <- mydata.sub[mydata.sub$treated==1,]
mydata.0 <- mydata.sub[mydata.sub$treated==0,]

reg.formula <- as.formula(paste('lnrealgross', "~",
                                paste(cov.names.sub, collapse = "+"))) 
lm.out.1 <- lm(reg.formula, data=mydata.1)
lm.out.0 <- lm(reg.formula, data=mydata.0)

m1 <- predict(lm.out.1, newdata=mydata.sub)
m0 <- predict(lm.out.0, newdata=mydata.sub)

estimate <- c(estimate, mean(m1 - m0))


## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata.sub)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata.sub[bs.inds,]

    mydata.bs.1 <- mydata.bs[mydata.bs$treated==1,]
    mydata.bs.0 <- mydata.bs[mydata.bs$treated==0,]

    lm.out.1 <- lm(reg.formula, data=mydata.bs.1)
    lm.out.0 <- lm(reg.formula, data=mydata.bs.0)
    
    m1 <- predict(lm.out.1, newdata=mydata.bs)
    m0 <- predict(lm.out.0, newdata=mydata.bs)

    estim.bs[b] <- mean(m1 - m0)
}
SE <- c(SE, sd(estim.bs))


## get the WATE weights
X1 <- as.matrix(mydata.sub[mydata.sub$treated==1, cov.names.sub])
X1 <- cbind(1, X1)
X0 <- as.matrix(mydata.sub[mydata.sub$treated==0, cov.names.sub])
X0 <- cbind(1, X0)
n1 <- nrow(X1)
n0 <- nrow(X0)
n <- n1 + n0
treat.vec <- c(rep(1, n1), rep(0, n0))
y1 <- mydata.sub[mydata.sub$treated==1, "lnrealgross"]
y0 <- mydata.sub[mydata.sub$treated==0, "lnrealgross"]
X <- rbind(X1, X0)
y <- as.matrix(c(y1, y0), nrow=n, ncol=1)
ones.n <- matrix(1, nrow=n, ncol=1)

w1 <- t(ones.n) %*% X %*% solve(t(X1) %*% X1) %*% t(X1)
w0 <- t(ones.n) %*% X %*% solve(t(X0) %*% X0) %*% t(X0)
w <- as.vector(c(w1, w0))

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(y, treat.vec, w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

mydata.reg.ate <- rbind(mydata.1, mydata.0)
mydata.reg.ate$w <- w
mydata.reg.ate$DFBETA.out <- DFBETA.out

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.reg.ate[min.ind, "fname"],
                                          mydata.reg.ate[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.reg.ate[max.ind, "fname"],
                                          mydata.reg.ate[max.ind, "sname"]))

extrap.1 <- sum(abs(w1) * (w1 < 0)) / sum(w1 * (w1 >= 0))
extrap.0 <- sum(abs(w0) * (w0 < 0)) / sum(w0 * (w0 >= 0))

extrap.treat <- c(extrap.treat, extrap.1)
extrap.control <- c(extrap.control, extrap.0)




## END general regression (two regression models)
## #############################################################







## #############################################################

## general regression (constant effect)
estimator <- c(estimator, "reg.const.effect")
lm.out <- lm(out.form.cov.sub, data=mydata.sub)
estimate <- c(estimate, coef(lm.out)["treated"])

## SE via nonparametric bootstrap

B <- 5000
n <- nrow(mydata.sub)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata.sub[bs.inds,]
    lm.out.bs <- lm(out.form.cov.sub, data=mydata.bs)
    estim.bs[b] <- coef(lm.out.bs)["treated"]
}
SE <- c(SE, sd(estim.bs))


## get the WATE weights
X1 <- as.matrix(mydata.sub[mydata.sub$treated==1, cov.names.sub])
X1 <- cbind(1, X1)
X0 <- as.matrix(mydata.sub[mydata.sub$treated==0, cov.names.sub])
X0 <- cbind(1, X0)
n1 <- nrow(X1)
n0 <- nrow(X0)
n <- n1 + n0
treat.vec <- c(rep(1, n1), rep(0, n0))
y1 <- mydata.sub[mydata.sub$treated==1, "lnrealgross"]
y0 <- mydata.sub[mydata.sub$treated==0, "lnrealgross"]
X <- rbind(X1, X0)
y <- as.matrix(c(y1, y0), nrow=n, ncol=1)
U <- cbind(X, treat.vec)
U1 <- cbind(X, 1)
U0 <- cbind(X, 0)
ones.n <- matrix(1, nrow=n, ncol=1)

w1 <- t(ones.n) %*% U1 %*% solve(t(U) %*% U) %*% t(U) -
    t(ones.n) %*% U0 %*% solve(t(U) %*% U) %*% t(U)
w0 <- t(ones.n) %*% U0 %*% solve(t(U) %*% U) %*% t(U) -
    t(ones.n) %*% U1 %*% solve(t(U) %*% U) %*% t(U)

w <- w0
w[1:n1] <- w1[1:n1]
w <- as.vector(w)
w1 <- w[1:n1]
w0 <- w[(n1+1):n]

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(y, treat.vec, w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)

mydata.reg_cons.ate <- rbind(mydata.1, mydata.0)
mydata.reg_cons.ate$w <- w
mydata.reg_cons.ate$DFBETA.out <- DFBETA.out


DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.reg_cons.ate[min.ind, "fname"],
                                          mydata.reg_cons.ate[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.reg_cons.ate[max.ind, "fname"],
                                          mydata.reg_cons.ate[max.ind, "sname"]))

extrap.1 <- sum(abs(w1) * (w1 < 0)) / sum(w1 * (w1 >= 0))
extrap.0 <- sum(abs(w0) * (w0 < 0)) / sum(w0 * (w0 >= 0))

extrap.treat <- c(extrap.treat, extrap.1)
extrap.control <- c(extrap.control, extrap.0)


## END general regression (constant effect)
## #############################################################






## #############################################################

## propensity score weighting (logit)
estimator <- c(estimator, "sipw.logit")

glm.out <- glm(treat.form.cov.sub, data=mydata.sub, family=binomial(logit))
mydata.sub$pscores <- glm.out$fitted
mydata.sub$w <- 1 / mydata.sub$pscores
mydata.sub$w[mydata.sub$treated==0] <- 1 /
    (1 - mydata.sub$pscores[mydata.sub$treated==0])

ATE.hat <- sum(mydata.sub$w * mydata.sub$treated * mydata.sub$lnrealgross) /
    sum(mydata.sub$w * mydata.sub$treated) -
    sum(mydata.sub$w * (1-mydata.sub$treated) * mydata.sub$lnrealgross) /
    sum(mydata.sub$w * (1-mydata.sub$treated))

estimate <- c(estimate, ATE.hat)


## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata.sub)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata.sub[bs.inds,]

    glm.out.bs <- glm(treat.form.cov.sub, data=mydata.bs,
                      family=binomial(logit))
    mydata.bs$pscores <- glm.out.bs$fitted
    mydata.bs$w <- 1 / mydata.bs$pscores
    mydata.bs$w[mydata.bs$treated==0] <- 1 /
        (1 - mydata.bs$pscores[mydata.bs$treated==0])
    
    ATE.hat.bs <- sum(mydata.bs$w * mydata.bs$treated * mydata.bs$lnrealgross) /
        sum(mydata.bs$w * mydata.bs$treated) -
        sum(mydata.bs$w * (1-mydata.bs$treated) * mydata.bs$lnrealgross) /
        sum(mydata.bs$w * (1-mydata.bs$treated))
    
    
    estim.bs[b] <- ATE.hat.bs
}
SE <- c(SE, sd(estim.bs))


w0 <- mydata.sub$w[mydata.sub$treated==0]
w1 <- mydata.sub$w[mydata.sub$treated==1]
n1 <- sum(mydata.sub$treated)
n <- nrow(mydata.sub)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata.sub$lnrealgross, mydata.sub$treated,
                              mydata.sub$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.sub[min.ind, "fname"],
                                          mydata.sub[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.sub[max.ind, "fname"],
                                          mydata.sub[max.ind, "sname"]))


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)


mydata.sipw.logit.ate <- mydata.sub
mydata.sipw.logit.ate$DFBETA.out <- DFBETA.out

## END propensity score weighting (logit)
## #############################################################









## #############################################################
## entropy balancing
estimator <- c(estimator, "entropy.balancing")


eb.out <- weightit(treat.form.cov.all, data=mydata.all, method="ebal",
                   estimand="ATE")
mydata.all$w <- eb.out$weights

wls.out <- lm(lnrealgross ~ treated, weights=w, data=mydata.all)

ATE.hat <- coef(wls.out)[2]

estimate <- c(estimate, ATE.hat)
SE <- c(SE, sqrt(vcov(wls.out)[2,2]))


w0 <- mydata.all$w[mydata.all$treated==0]
w1 <- mydata.all$w[mydata.all$treated==1]
n1 <- sum(mydata.all$treated)
n <- nrow(mydata.all)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata.all$lnrealgross, mydata.all$treated,
                              mydata.all$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.all[min.ind, "fname"],
                                          mydata.all[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.all[max.ind, "fname"],
                                          mydata.all[max.ind, "sname"]))


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)


mydata.entropy.balancing.ate <- mydata.all
mydata.entropy.balancing.ate$DFBETA.out <- DFBETA.out



## END entropy balancing
## #############################################################




## #############################################################
## genetic matching
estimator <- c(estimator, "genetic.matching")

treat.vec <- mydata.all$treated
X <- as.matrix(mydata.all[, cov.names.all])
y <- mydata.all$lnrealgross

GenMatch.out <- GenMatch(Tr = treat.vec,
                         X = X,
                         estimand = "ATE",
                         M = 1, ties=FALSE,
                         replace = TRUE,
                         pop.size = 50000)

match.out <- Match(Y = y, Tr=treat.vec, X=X, estimand="ATE",
                   M=1, ties=FALSE, replace=TRUE, BiasAdjust=FALSE,
                   Weight.matrix=GenMatch.out$Weight.matrix)

estimate <- c(estimate, match.out$est)
SE <- c(SE, match.out$se.standard)

mydata.all$w <- 0
for (i in 1:nrow(mydata.all)){
     mydata.all$w[i] <- sum(match.out$index.control == i) +
            sum(match.out$index.treated == i)
}

w0 <- mydata.all$w[mydata.all$treated==0]
w1 <- mydata.all$w[mydata.all$treated==1]
n1 <- sum(mydata.all$treated)
n <- nrow(mydata.all)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata.all$lnrealgross, mydata.all$treated,
                              mydata.all$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.all[min.ind, "fname"],
                                          mydata.all[min.ind, "sname"]))

DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.all[max.ind, "fname"],
                                          mydata.all[max.ind, "sname"]))


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)

mydata.genMatching.ate <- mydata.all
mydata.genMatching.ate$DFBETA.out <- DFBETA.out


## END genetic matching
## #############################################################




### Table 2 (top panel) ###
out.data.ate <- data.frame(estimator=estimator,
                       estimate=estimate,
                       SE=SE,
                       #Z = estimate / SE,
                       Eff.n=Eff.n,
                       Eff.n.ratio=Eff.n.ratio,
                       DFBETA.min=DFBETA.min,
                       DFBETA.min.who=DFBETA.min.who,
                       DFBETA.max=DFBETA.max,
                       DFBETA.max.who=DFBETA.max.who,
                       extrap.control=extrap.control,
                       extrap.treat=extrap.treat)

print(out.data.ate)
#library(xtable)
#xtable(out.data.ate, digits=c(0, 0, rep(3, 2), 1, rep(3, 2), 0, 3, 0, rep(3, 2)))


## #############################################################
## FIGURES 

mydata.raw <- mydata.sub 
mydata.raw$w <- 1

data.list <- list(mydata.raw,
                  mydata.reg.ate,
                  mydata.reg_cons.ate,
                  mydata.sipw.logit.ate,
                  mydata.entropy.balancing.ate,
                  mydata.genMatching.ate)

TASMD.data.1 <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                     ncol = length(data.list)))

TASMD.data.0 <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                     ncol = length(data.list)))

colnames.var <- c("Raw Data",
                  "Regression (MRI)",
                  "Regression (URI)",
                  "PScores",
                  "Ebal",
                  "GenMatch")

colnames(TASMD.data.1) <- colnames.var
rownames(TASMD.data.1) <- cov.names.sub

colnames(TASMD.data.0) <- colnames.var
rownames(TASMD.data.0) <- cov.names.sub

## treated & control separately
for(i in 1:length(data.list)){
    mydata <- data.list[[i]]
    for (xvar in cov.names.sub){
        tasmd <- TASMD.ATE(x=mydata[,xvar], w=mydata$w,
                       treatment=mydata$treated)
        TASMD.data.1[xvar, i] <- tasmd$TASMD.1 # treated
        TASMD.data.0[xvar, i] <- tasmd$TASMD.0 # control
    }
}



xvar.names <- c("Birth Year", "Death Year", 
                "Schooling: Public", "Schooling: Eton", 
                "Schooling: Regular",
                "University: Oxbridge", "University: Degree",
                "Aristocrat", "Female",
                "Teacher", "Barrister", "Solicitor",
                "Doctor", "Civil Servant", "Local Politician",
                "Business", "White Collar", "Journalist")


TASMD.data.1$xvar <- xvar.names
TASMD.data.0$xvar <- xvar.names

TASMD.long.1 <- melt(setDT(TASMD.data.1),
                    id.vars="xvar", variable.name = "method")
TASMD.long.0 <- melt(setDT(TASMD.data.0),
                    id.vars="xvar", variable.name = "method")


colors <- brewer.pal(7, "Dark2")
colors <- replace(colors, c(1, 2), colors[c(2, 1)])

### Figure A2 (a) TASMD: Conservative Winners (treated)


#pdf("TASMD-ConservativeMPs-ate-treated.pdf", 
#    height=7, width=6, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), 
                         cex = 1,
                         col = colors))
TASMD.long.1$xvar <- factor(TASMD.long.1$xvar, 
                            levels = unique(TASMD.long.1$xvar)) # ylab order
print(dotplot(xvar ~ value, data = TASMD.long.1,
              groups = method,
              xlab = "TASMD (Treated)",
              scales = list(y = list(cex = 1.2), 
                            x = list(cex = 1.2, 
                                     limits = c(-0.05, 0.5), 
                                     at = seq(0, 0.4, by = 0.1))),
              panel = function(x, y, ...) {
                  panel.abline(h = 1:length(unique(TASMD.long.1$xvar)), 
                               col = "grey95", ...)
                  panel.abline(v = 0.1, col = "grey85")
                  panel.xyplot(x, y, ...)
              } # remove legends for the treated and only show in the control 
))
#dev.off()

### Figure A2 (b) TASMD: Conservative Losers (control)


#pdf("TASMD-ConservativeMPs-ate-control.pdf", 
#    height=7, width=6, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), 
                         cex = 1,
                         col = colors))
TASMD.long.0$xvar <- factor(TASMD.long.0$xvar, 
                            levels = unique(TASMD.long.0$xvar))

print(dotplot(xvar ~ value, data = TASMD.long.0,
              groups = method,
              xlab = "TASMD (Control)",
              scales = list(y = list(draw = FALSE, cex = 1.2), ## Remove ylab
                            x = list(cex = 1.2, 
                                     limits = c(-0.05, 0.5), 
                                     at = seq(0, 0.4, by = 0.1))), 
              panel = function(x, y, ...) {
                  panel.abline(h = 1:length(unique(TASMD.long.0$xvar)), col = "grey95", ...)
                  panel.abline(v = 0.1, col = "grey85")
                  panel.xyplot(x, y, ...)
              },
              key = simpleKey(levels(TASMD.long.0$method),
                              space = "right")
))
trellis.par.set(trell.par)  ## go back to default
#dev.off()




### Figure A2: KS ###


KS.data.1 <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                  ncol = length(data.list)))

KS.data.0 <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                  ncol = length(data.list)))

colnames(KS.data.1) <- colnames.var
rownames(KS.data.1) <- cov.names.sub

colnames(KS.data.0) <- colnames.var
rownames(KS.data.0) <- cov.names.sub

## treated & control separately
for(i in 1:length(data.list)){
    mydata <- data.list[[i]]
    for (xvar in cov.names.sub){
        ks <- wtd.ks.ate(x = mydata[,xvar], w=mydata$w,
                     treatment=mydata$treated)
        KS.data.1[xvar, i] <- ks$KS.weighted.1targ # treated
        KS.data.0[xvar, i] <- ks$KS.weighted.0targ # control
    }
}


KS.data.1$xvar <- xvar.names
KS.data.0$xvar <- xvar.names

KS.long.1 <- melt(setDT(KS.data.1),
                    id.vars="xvar", variable.name = "method")
KS.long.0 <- melt(setDT(KS.data.0),
                    id.vars="xvar", variable.name = "method")


colors <- brewer.pal(7, "Dark2")
colors <- replace(colors, c(1, 2), colors[c(2, 1)])

### Figure A2 (c) KS: Conservative Winners (treated)


#pdf("KS-ConservativeMPs-ate-treated.pdf", 
#    height=7, width=6, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), 
                         cex = 1,
                         col = colors))
KS.long.1$xvar <- factor(KS.long.1$xvar, 
                            levels = unique(KS.long.1$xvar)) # ylab order
print(dotplot(xvar ~ value, data = KS.long.1,
              groups = method,
              xlab = "KS Statistics (Treated)",
              scales = list(y = list(cex = 1.2), 
                            x = list(cex = 1.2, 
                                     limits = c(-0.01, 0.11), 
                                     at = seq(0, 0.1, by = 0.02))),
              panel = function(x, y, ...) {
                  panel.abline(h = 1:length(unique(KS.long.1$xvar)), 
                               col = "grey95", ...)
                  panel.abline(v = 0, col = "grey85")
                  panel.xyplot(x, y, ...)
              } # remove legends for the treated and only show in the control 
))
#dev.off()

### Figure A2 (D) KS: Conservative Winners (control)

#pdf("KS-ConservativeMPs-ate-control.pdf", 
#    height=7, width=6, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), 
                         cex = 1,
                         col = colors))
KS.long.0$xvar <- factor(KS.long.0$xvar, 
                            levels = unique(KS.long.0$xvar))

print(dotplot(xvar ~ value, data = KS.long.0,
              groups = method,
              xlab = "KS Statistics (Control)",
              scales = list(y = list(draw = FALSE, cex = 1.2), ## Remove ylab
                            x = list(cex = 1.2, 
                                     limits = c(-0.01, 0.11), 
                                     at = seq(0, 0.1, by = 0.02))), 
              panel = function(x, y, ...) {
                  panel.abline(h = 1:length(unique(KS.long.0$xvar)), col = "grey95", ...)
                  panel.abline(v = 0, col = "grey85")
                  panel.xyplot(x, y, ...)
              },
              key = simpleKey(levels(KS.long.0$method),
                              space = "right")
))
trellis.par.set(trell.par)  ## go back to default
#dev.off()

